
#delimit ;

**************************************************************************
   Label           : fim7114_gid_BA_2_BAL_wide
   Rows            : 38348
   Columns         : 10
   ASCII File Date : February 3, 2017
*************************************************************************;


infix 
         ER30001               1 - 4    
         ER30002               5 - 7    
         ER30001_P_AF          8 - 11   
         ER30002_P_AF         12 - 14   
         ER30001_P_AM         15 - 18   
         ER30002_P_AM         19 - 21   
         ER30001_P_F          22 - 25   
         ER30002_P_F          26 - 28   
         ER30001_P_M          29 - 32   
         ER30002_P_M          33 - 35   
using ./FIMS/FIMSGenerations.txt, clear 
;

label variable  ER30001              "1968 INTERVIEW NUMBER" ;
label variable  ER30002              "PERSON NUMBER 68" ;
label variable  ER30001_P_AF         "1968 INTERVIEW NUMBER /PARENT /ADOPTIVE FATHER" ;
label variable  ER30002_P_AF         "PERSON NUMBER 68 /PARENT /ADOPTIVE FATHER" ;
label variable  ER30001_P_AM         "1968 INTERVIEW NUMBER /PARENT /ADOPTIVE MOTHER" ;
label variable  ER30002_P_AM         "PERSON NUMBER 68 /PARENT /ADOPTIVE MOTHER" ;
label variable  ER30001_P_F          "1968 INTERVIEW NUMBER /PARENT /FATHER" ;
label variable  ER30002_P_F          "PERSON NUMBER 68 /PARENT /FATHER" ;
label variable  ER30001_P_M          "1968 INTERVIEW NUMBER /PARENT /MOTHER" ;
label variable  ER30002_P_M          "PERSON NUMBER 68 /PARENT /MOTHER" ;

#delimit cr

/* Following Mazumder paper, we only want certain parts of the PSID: the main nationally representative sample (SRC). 
The SEO sample is used in robustness checks (it has famid greater than 5000 but less than 7000). 
The immigrant samples have values between 3000 and 5000 and Latino families added in had values above 7000. 
https://psidonline.isr.umich.edu/guide/faq.aspx */
	gen src_sample = ER30001<3000
	gen seo_sample = ER30001>5000 & ER30001<7000
	keep if src_sample==1 | seo_sample==1 //this can be changed

* ID that we will use for son (note this includes daughters for now)
	gen son_id= ER30001*1000 + ER30002
	
* ID that we will use for biological father
	gen father_id=. 
	replace father_id = ER30001_P_F*1000 + ER30002_P_F if ER30001_P_F<. 
	replace father_id = ER30001_P_AF*1000 + ER30002_P_AF if ER30001_P_AF<. 

* ID that we will use for biological mother
	gen mother_id=.
	replace mother_id = ER30001_P_M*1000 + ER30002_P_M if ER30001_P_M<. 
	replace mother_id = ER30001_P_AM*1000 + ER30002_P_AM if ER30001_P_AM<. //note: only 74 cases where both bio mom and adoptive mom ids are both non-missing. In this case, the ID of the adoptive mom will be assigned. (Same as with father id above.)

	preserve
		/*note: Not necessary to do "drop if father_id==. & mother_id==. because 
		        every kid in the file can be matched to at least one parent.*/
		keep son_id father_id mother_id

		gen momlink_only = (mother_id!=. & father_id==.)
		label var momlink_only "R can only be linked to mom"

		gen link_bothparents = (mother_id!=. & father_id!=.)
		label var link_bothparents "R can be linked to both parents"

		sort son_id 
		compress 
		save ./FIMS/FIMSEitherparentKids_SRC_SEO.dta, replace

	**Now keep only respondents who can be linked to their mother
		keep if mother_id!=. & father_id==.

		keep son_id mother_id 

		compress 
		save ./FIMS/FIMSMothersKids_SRC_SEO.dta, replace
	restore

* Keep only if we have information on biological or adoptive 
	drop if father_id==.
	
* Keep relevant variables
	keep son_id father_id 

*** Save father and kid links in SRC
	compress
	save ./FIMS/FIMSFathersKids_SRC_SEO.dta, replace
	
* One more file that just tells us who the FIMS fathers are so we can narrow sample of fathers
	gen number=1
	drop son_id
	collapse (sum) number, by(father_id)
	drop if father_id==.
	save ./FIMS/FIMSFathers_SRC_SEO.dta, replace
